NumPy

NumPy란

NumPy Array


In [1]:
import numpy as np
a = np.array([0, 1, 2, 3])
a


Out[1]:
array([0, 1, 2, 3])

Python List vs NumPy Array

  • Python List
    • 여러가지 타입의 원소
    • 메모리 용량이 크고 속도가 느림
    • nesting 가능
    • 전체 연산 불가
  • NumPy Array
    • 동일 타입의 원소
    • 메모리 최적화, 계산 속도 향상
    • 크기(dimension)이 명확하게 정의
    • 전체 연산 가능

In [2]:
L = range(1000)
%timeit [i**2 for i in L]


The slowest run took 4.21 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 3: 93.3 µs per loop

In [3]:
a = np.arange(1000)
%timeit a**2


The slowest run took 45.14 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.57 µs per loop

In [4]:
L = range(3)
L


Out[4]:
[0, 1, 2]

In [5]:
L * 2


Out[5]:
[0, 1, 2, 0, 1, 2]

In [6]:
[i * 2 for i in L]


Out[6]:
[0, 2, 4]

In [7]:
a = np.arange(3)
a


Out[7]:
array([0, 1, 2])

In [8]:
a * 2


Out[8]:
array([0, 2, 4])

Create Array (1D)


In [9]:
a = np.array([0, 1, 2, 3])
a


Out[9]:
array([0, 1, 2, 3])

In [10]:
a.ndim


Out[10]:
1

In [11]:
a.shape


Out[11]:
(4,)

In [12]:
len(a)


Out[12]:
4

Create Array (2D)


In [13]:
b = np.array([[0, 1, 2], 
              [3, 4, 5]])    # 2 x 3 array
b


Out[13]:
array([[0, 1, 2],
       [3, 4, 5]])

In [14]:
b.ndim


Out[14]:
2

In [15]:
b.shape


Out[15]:
(2, 3)

In [16]:
len(b)


Out[16]:
2

In [17]:
a2 = np.array([[0, 1, 2, 3]]).T
a2


Out[17]:
array([[0],
       [1],
       [2],
       [3]])

In [18]:
a3 = np.array([[0], [1], [2], [3]])
a3


Out[18]:
array([[0],
       [1],
       [2],
       [3]])

In [19]:
a2.shape


Out[19]:
(4, 1)

Create Array (3D)


In [20]:
c = np.array([[[1,2], 
               [3,4]], 
              [[5,6], 
               [7,8]]])
c


Out[20]:
array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [21]:
c.ndim


Out[21]:
3

In [22]:
c.shape


Out[22]:
(2, 2, 2)

In [23]:
len(c)


Out[23]:
2

1 dim vs 2 dim


In [24]:
a = np.arange(4)
a


Out[24]:
array([0, 1, 2, 3])

In [25]:
a.shape


Out[25]:
(4,)

In [26]:
b = np.array([[0, 1, 2, 3]])
b


Out[26]:
array([[0, 1, 2, 3]])

In [27]:
b.shape


Out[27]:
(1, 4)

In [28]:
c = np.array([[0], [1], [2], [3]])
c


Out[28]:
array([[0],
       [1],
       [2],
       [3]])

In [29]:
c.shape


Out[29]:
(4, 1)

Transpose


In [30]:
a = np.array([[0, 1, 2, 3]])
a


Out[30]:
array([[0, 1, 2, 3]])

In [31]:
a.shape


Out[31]:
(1, 4)

In [32]:
b = a.T
b


Out[32]:
array([[0],
       [1],
       [2],
       [3]])

In [33]:
b.shape


Out[33]:
(4, 1)

Array Creation Functions

  • arange
  • linspace, logspace
  • zeros, ones
  • rand, randn
  • tile

In [34]:
a = np.arange(10) # 0 .. n-1  (!)
a


Out[34]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
b = np.arange(1, 9, 2) # start, end (exclusive), step
b


Out[35]:
array([1, 3, 5, 7])

In [36]:
c = np.linspace(0, 1, 6)   # start, end, num-points
c


Out[36]:
array([ 0. ,  0.2,  0.4,  0.6,  0.8,  1. ])

In [37]:
d = np.linspace(0, 1, 5, endpoint=False)
d


Out[37]:
array([ 0. ,  0.2,  0.4,  0.6,  0.8])

In [38]:
a = np.ones((3, 3))  # reminder: (3, 3) is a tuple
a


Out[38]:
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [39]:
b = np.zeros((2, 2))
b


Out[39]:
array([[ 0.,  0.],
       [ 0.,  0.]])

In [40]:
c = np.diag([1,2,3])
c


Out[40]:
array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [41]:
d = np.eye(4)
d


Out[41]:
array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

In [42]:
a = np.array([0, 1, 2])
a


Out[42]:
array([0, 1, 2])

In [43]:
np.tile(a, 2)


Out[43]:
array([0, 1, 2, 0, 1, 2])

In [44]:
np.tile(a, (3, 2))


Out[44]:
array([[0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2]])

In [45]:
np.tile(a, (2, 1, 2))


Out[45]:
array([[[0, 1, 2, 0, 1, 2]],

       [[0, 1, 2, 0, 1, 2]]])

In [46]:
b = np.array([[1, 2], [3, 4]])
b


Out[46]:
array([[1, 2],
       [3, 4]])

In [47]:
np.tile(b, 2)


Out[47]:
array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

In [48]:
np.tile(b, (2, 1))


Out[48]:
array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

Shape Change

  • reshape
  • flatten, ravel

In [49]:
a = np.arange(20)
a


Out[49]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [50]:
b = np.reshape(a, (4, 5))
b


Out[50]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [51]:
c = a.reshape(4,5)
c


Out[51]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])
  • 인수가 -1
    • numpy가 나머지 인수들을 이용하여 사이즈를 맞춘다.

In [52]:
a = np.arange(24)
a.reshape(2, 12)


Out[52]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])

In [53]:
a.reshape(2, -1)


Out[53]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])

In [54]:
a.reshape(-1, 12)


Out[54]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])

In [55]:
c


Out[55]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [56]:
d = c.flatten() # return a copy
d


Out[56]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [57]:
d.base is None


Out[57]:
True

In [58]:
e = c.ravel()
e


Out[58]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [59]:
e.base


Out[59]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

Stack

  • hstack
  • vstack
  • dstack

In [60]:
a = np.arange(5)
a


Out[60]:
array([0, 1, 2, 3, 4])

In [61]:
np.vstack([a * 10, a * 20])


Out[61]:
array([[ 0, 10, 20, 30, 40],
       [ 0, 20, 40, 60, 80]])

In [62]:
b = np.arange(5)[:, np.newaxis]
b


Out[62]:
array([[0],
       [1],
       [2],
       [3],
       [4]])

In [63]:
np.hstack([b * 10, b * 20])


Out[63]:
array([[ 0,  0],
       [10, 20],
       [20, 40],
       [30, 60],
       [40, 80]])

In [64]:
a = np.array((1,2,3))
b = np.array((2,3,4))

In [65]:
a


Out[65]:
array([1, 2, 3])

In [66]:
a.shape


Out[66]:
(3,)

In [67]:
np.dstack((a,b))


Out[67]:
array([[[1, 2],
        [2, 3],
        [3, 4]]])

In [68]:
a = np.array([[1],[2],[3]])
b = np.array([[2],[3],[4]])
np.dstack((a,b))


Out[68]:
array([[[1, 2]],

       [[2, 3]],

       [[3, 4]]])

dtype

  • bool Boolean (True or False) stored as a byte
  • int8 Byte (-128 to 127)
  • int16 Integer (-32768 to 32767)
  • int32 Integer (-2147483648 to 2147483647)
  • int64 Integer (-9223372036854775808 to 9223372036854775807)
  • uint8 Unsigned integer (0 to 255)
  • uint16 Unsigned integer (0 to 65535)
  • uint32 Unsigned integer (0 to 4294967295)
  • uint64 Unsigned integer (0 to 18446744073709551615)
  • float16 Half precision float: sign bit, 5 bits exponent, 10 bits mantissa
  • float32 Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
  • float64 Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
  • S String

In [69]:
a = np.array([1, 2, 3])
a.dtype


Out[69]:
dtype('int64')

In [70]:
b = np.array([1., 2., 3.])
b.dtype


Out[70]:
dtype('float64')

In [71]:
c = np.array([1, 2, 3], dtype=np.float64)
c.dtype


Out[71]:
dtype('float64')

In [72]:
d = np.array([1+2j, 3+4j, 5+6*1j])
d.dtype


Out[72]:
dtype('complex128')

In [73]:
e = np.array([True, False, False, True])
e.dtype


Out[73]:
dtype('bool')

In [74]:
f = np.array(['Bonjour', 'Hello', 'Hallo',])
f.dtype


Out[74]:
dtype('S7')
  • NaN Not a Number
  • Inf Infinity

In [75]:
x = np.array([1, -1, 0]) / np.array([0, 0, 0])
x


Out[75]:
array([ inf, -inf,  nan])

In [76]:
x[0]


Out[76]:
inf

In [77]:
np.inf, np.nan


Out[77]:
(inf, nan)

Indexing


In [78]:
a = np.arange(10)
a


Out[78]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [79]:
a[0], a[2], a[-1]


Out[79]:
(0, 2, 9)

In [80]:
a[::-1]


Out[80]:
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

Multi-dimensional Indexing


In [81]:
l = [[0,0,0],[0,1,0],[0,0,2]]

In [82]:
l[1]


Out[82]:
[0, 1, 0]

In [83]:
l[1][1]


Out[83]:
1

In [84]:
a = np.diag(np.arange(3))
a


Out[84]:
array([[0, 0, 0],
       [0, 1, 0],
       [0, 0, 2]])

In [85]:
a[1, 1]


Out[85]:
1

In [86]:
a[2, 1] = 10 # third line, second column
a


Out[86]:
array([[ 0,  0,  0],
       [ 0,  1,  0],
       [ 0, 10,  2]])

In [87]:
a[2] = [10, 20, 30]
a


Out[87]:
array([[ 0,  0,  0],
       [ 0,  1,  0],
       [10, 20, 30]])

Slicing


In [88]:
a = np.arange(10)
a


Out[88]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [89]:
a[2:9:3] # [start:end:step]


Out[89]:
array([2, 5, 8])

In [90]:
a[:4]


Out[90]:
array([0, 1, 2, 3])

In [91]:
a[1:3]


Out[91]:
array([1, 2])

In [92]:
a[::2]


Out[92]:
array([0, 2, 4, 6, 8])

In [93]:
a[3:]


Out[93]:
array([3, 4, 5, 6, 7, 8, 9])

Multi-dimensional Slicing


In [94]:
a = np.arange(6) + (np.arange(6) * 10)[:, np.newaxis]
a


Out[94]:
array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [95]:
a[0,:]


Out[95]:
array([0, 1, 2, 3, 4, 5])

In [96]:
a[:,0]


Out[96]:
array([ 0, 10, 20, 30, 40, 50])

newaxis

  • 차원 확장

In [97]:
a = np.arange(4)
a


Out[97]:
array([0, 1, 2, 3])

In [98]:
a.shape


Out[98]:
(4,)

In [99]:
b = np.arange(4).reshape(4,1)
b


Out[99]:
array([[0],
       [1],
       [2],
       [3]])

In [100]:
b.shape


Out[100]:
(4, 1)

In [101]:
c = np.arange(4)[:, np.newaxis]
c


Out[101]:
array([[0],
       [1],
       [2],
       [3]])

In [102]:
c.shape


Out[102]:
(4, 1)

View

  • A slicing operation creates a view on the original array, which is just a way of accessing array data.
  • Thus the original array is not copied in memory.

In [103]:
a = np.arange(10)
a


Out[103]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [104]:
b = a[::2]
b


Out[104]:
array([0, 2, 4, 6, 8])

In [105]:
a[0] = 99
a


Out[105]:
array([99,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [106]:
b


Out[106]:
array([99,  2,  4,  6,  8])

Copy


In [107]:
a = np.arange(5)
a


Out[107]:
array([0, 1, 2, 3, 4])

In [108]:
b = a.copy()
b


Out[108]:
array([0, 1, 2, 3, 4])

In [109]:
a[0] = 99
a


Out[109]:
array([99,  1,  2,  3,  4])

In [110]:
b


Out[110]:
array([0, 1, 2, 3, 4])

Fancy indexing 팬시 인덱싱

  • Boolean Fancy Indexing
    • True인 원소만 선택
    • 크기가 같아야 한다.
  • list
    • 또는 tuple, or array
    • 지정된 인덱스만 선택
    • 크기가 달라도 된다.
  • multi dimension에도 사용 가능
  • create copy, not view

In [111]:
a = np.arange(20)
a


Out[111]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [112]:
a % 2


Out[112]:
array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])

In [113]:
idx = (a % 2) == 0
idx


Out[113]:
array([ True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False], dtype=bool)

In [114]:
a[idx]


Out[114]:
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [115]:
a[(a % 2) == 0]


Out[115]:
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [116]:
a = np.arange(50) * 10
a


Out[116]:
array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120,
       130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250,
       260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380,
       390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490])

In [117]:
idx = [1, 3, 4, -1, 30]
a[idx]


Out[117]:
array([ 10,  30,  40, 490, 300])

In [118]:
a[[1,3,4,-1,30]]


Out[118]:
array([ 10,  30,  40, 490, 300])

In [119]:
a = np.arange(6) + (np.arange(6) * 10)[:, np.newaxis]
a


Out[119]:
array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [120]:
a[[0,1,2,3,4],(1,2,3,4,5)]


Out[120]:
array([ 1, 12, 23, 34, 45])

In [121]:
a[3:, [0,2,5]]


Out[121]:
array([[30, 32, 35],
       [40, 42, 45],
       [50, 52, 55]])

Array Operation

Elementwise operations


In [122]:
a = np.array([1, 2, 3, 4])
a


Out[122]:
array([1, 2, 3, 4])

In [123]:
a + 1


Out[123]:
array([2, 3, 4, 5])

In [124]:
2**a


Out[124]:
array([ 2,  4,  8, 16])

In [125]:
b = np.ones(4) + 1
b


Out[125]:
array([ 2.,  2.,  2.,  2.])

In [126]:
a - b


Out[126]:
array([-1.,  0.,  1.,  2.])

In [127]:
a + b


Out[127]:
array([ 3.,  4.,  5.,  6.])

In [128]:
c = np.ones((3, 3))
c


Out[128]:
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [129]:
c * c # element-wise, NOT Matrix product


Out[129]:
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [130]:
c.dot(c) # matrix product


Out[130]:
array([[ 3.,  3.,  3.],
       [ 3.,  3.,  3.],
       [ 3.,  3.,  3.]])

In [131]:
a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])

In [132]:
a == b


Out[132]:
array([False,  True, False,  True], dtype=bool)

In [133]:
a > b


Out[133]:
array([False, False,  True, False], dtype=bool)

In [134]:
a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])
c = np.array([1, 2, 3, 4])

In [135]:
np.array_equal(a, b)


Out[135]:
False

In [136]:
np.array_equal(a, c)


Out[136]:
True

In [137]:
a = np.arange(5)

In [138]:
np.sin(a)


Out[138]:
array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [139]:
np.log(a)


Out[139]:
array([       -inf,  0.        ,  0.69314718,  1.09861229,  1.38629436])

In [140]:
np.exp(a)


Out[140]:
array([  1.        ,   2.71828183,   7.3890561 ,  20.08553692,  54.59815003])

In [141]:
np.log10(a)


Out[141]:
array([       -inf,  0.        ,  0.30103   ,  0.47712125,  0.60205999])

In [142]:
a = np.arange(4)
b = np.array([1, 2])

In [143]:
a


Out[143]:
array([0, 1, 2, 3])

In [144]:
b


Out[144]:
array([1, 2])

In [145]:
a + b


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-145-f96fb8f649b6> in <module>()
----> 1 a + b

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

Dimension Reduction Operation

  • sum
  • min, max, argmin, argmax
  • mean, median, std, var
  • all, any

In [147]:
x = np.array([1, 2, 3, 4])
x


Out[147]:
array([1, 2, 3, 4])

In [148]:
np.sum(x)


Out[148]:
10

In [149]:
x.sum()


Out[149]:
10

In [150]:
x = np.array([[1, 1], [2, 2]])
x


Out[150]:
array([[1, 1],
       [2, 2]])

<img src="http://www.scipy-lectures.org/_images/reductions.png", style="width: 20%; margin: 0 auto 0 auto;">


In [151]:
x.sum()


Out[151]:
6

In [152]:
x.sum(axis=0)   # columns (first dimension)


Out[152]:
array([3, 3])

In [153]:
x.sum(axis=1)   # rows (second dimension)


Out[153]:
array([2, 4])

In [154]:
x = np.array([1, 3, 2])

In [155]:
x.min()


Out[155]:
1

In [156]:
x.max()


Out[156]:
3

In [157]:
x.argmin()  # index of minimum


Out[157]:
0

In [158]:
x.argmax()  # index of maximum


Out[158]:
1

In [159]:
np.all([True, True, False])


Out[159]:
False

In [160]:
np.any([True, True, False])


Out[160]:
True

In [161]:
a = np.zeros((100, 100), dtype=np.int)
a


Out[161]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [162]:
np.any(a != 0)


Out[162]:
False

In [163]:
np.all(a == a)


Out[163]:
True

In [164]:
a = np.array([1, 2, 3, 2])
b = np.array([2, 2, 3, 2])
c = np.array([6, 4, 4, 5])

In [165]:
((a <= b) & (b <= c)).all()


Out[165]:
True

In [166]:
x = np.array([1, 2, 3, 1])
y = np.array([[1, 2, 3], [5, 6, 1]])

In [167]:
x.mean()


Out[167]:
1.75

In [168]:
np.median(x)


Out[168]:
1.5

In [169]:
np.median(y, axis=-1) # last axis


Out[169]:
array([ 2.,  5.])

In [170]:
x.std()          # full population standard dev.


Out[170]:
0.82915619758884995

Broadcasting


In [171]:
a = np.tile(np.arange(0, 40, 10), (3, 1)).T
a


Out[171]:
array([[ 0,  0,  0],
       [10, 10, 10],
       [20, 20, 20],
       [30, 30, 30]])

In [172]:
b = np.array([0, 1, 2])
b


Out[172]:
array([0, 1, 2])

In [173]:
a + b


Out[173]:
array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [174]:
a[:,0][:, np.newaxis]


Out[174]:
array([[ 0],
       [10],
       [20],
       [30]])

In [175]:
a[:,0][:, np.newaxis] + b


Out[175]:
array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [176]:
a = np.ones((4, 5))
a


Out[176]:
array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [177]:
a[0]


Out[177]:
array([ 1.,  1.,  1.,  1.,  1.])

In [178]:
a[0] = 2
a


Out[178]:
array([[ 2.,  2.,  2.,  2.,  2.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [179]:
x, y = np.arange(5), np.arange(5)[:, np.newaxis]

In [180]:
x


Out[180]:
array([0, 1, 2, 3, 4])

In [181]:
y


Out[181]:
array([[0],
       [1],
       [2],
       [3],
       [4]])

In [182]:
distance = np.sqrt(x ** 2 + y ** 2)
distance


Out[182]:
array([[ 0.        ,  1.        ,  2.        ,  3.        ,  4.        ],
       [ 1.        ,  1.41421356,  2.23606798,  3.16227766,  4.12310563],
       [ 2.        ,  2.23606798,  2.82842712,  3.60555128,  4.47213595],
       [ 3.        ,  3.16227766,  3.60555128,  4.24264069,  5.        ],
       [ 4.        ,  4.12310563,  4.47213595,  5.        ,  5.65685425]])

ogrid, mgrid, meshgrid


In [183]:
x, y = np.ogrid[0:3, 0:5]

In [184]:
x


Out[184]:
array([[0],
       [1],
       [2]])

In [185]:
y


Out[185]:
array([[0, 1, 2, 3, 4]])

In [186]:
np.ogrid[-1:1:3j, -1:1:5j]


Out[186]:
[array([[-1.],
        [ 0.],
        [ 1.]]), array([[-1. , -0.5,  0. ,  0.5,  1. ]])]

In [187]:
x, y = np.mgrid[0:3, 0:5]

In [188]:
x


Out[188]:
array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2]])

In [189]:
y


Out[189]:
array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [190]:
np.mgrid[-1:1:3j, -1:1:5j]


Out[190]:
array([[[-1. , -1. , -1. , -1. , -1. ],
        [ 0. ,  0. ,  0. ,  0. ,  0. ],
        [ 1. ,  1. ,  1. ,  1. ,  1. ]],

       [[-1. , -0.5,  0. ,  0.5,  1. ],
        [-1. , -0.5,  0. ,  0.5,  1. ],
        [-1. , -0.5,  0. ,  0.5,  1. ]]])

In [191]:
X, Y = np.meshgrid(np.arange(3), np.arange(5))

In [192]:
X


Out[192]:
array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

In [193]:
Y


Out[193]:
array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2],
       [3, 3, 3],
       [4, 4, 4]])

In [194]:
zip(X.ravel(), Y.ravel())


Out[194]:
[(0, 0),
 (1, 0),
 (2, 0),
 (0, 1),
 (1, 1),
 (2, 1),
 (0, 2),
 (1, 2),
 (2, 2),
 (0, 3),
 (1, 3),
 (2, 3),
 (0, 4),
 (1, 4),
 (2, 4)]

In [195]:
plt.scatter(*np.vstack(np.meshgrid(np.linspace(-1,1,10), np.linspace(-2,2,10))).reshape(2,-1).tolist())


Out[195]:
<matplotlib.collections.PathCollection at 0x7fa534eaca90>

sort


In [196]:
a = np.array([[4, 3, 5], [1, 2, 1]])
a


Out[196]:
array([[4, 3, 5],
       [1, 2, 1]])

In [197]:
a[:,0]


Out[197]:
array([4, 1])

In [198]:
b = np.sort(a, axis=0)
b


Out[198]:
array([[1, 2, 1],
       [4, 3, 5]])

In [199]:
b = np.sort(a, axis=1)
b


Out[199]:
array([[3, 4, 5],
       [1, 1, 2]])

In [200]:
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
j


Out[200]:
array([2, 3, 1, 0])

In [201]:
a[j]


Out[201]:
array([1, 2, 3, 4])

Array용 수학 함수

  • universal function
    • 빠른 element-wise (vectorized) 연산
  • 모든 NumPy/Scipy 수학 함수는 자동으로 vectorized 연산 수행

In [202]:
x = range(10)
x


Out[202]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [203]:
import math
math.exp(x)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-203-41425258536a> in <module>()
      1 import math
----> 2 math.exp(x)

TypeError: a float is required

In [205]:
math.exp(x[0])


Out[205]:
1.0

In [206]:
[math.exp(x_i) for x_i in x]


Out[206]:
[1.0,
 2.718281828459045,
 7.38905609893065,
 20.085536923187668,
 54.598150033144236,
 148.4131591025766,
 403.4287934927351,
 1096.6331584284585,
 2980.9579870417283,
 8103.083927575384]

In [207]:
np.exp(x)


Out[207]:
array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

Random Number

numpy.random 서브패키지

  • seed: pseudo random 상태 설정
  • shuffle: 조합(combination)
  • choice: 순열(permutation) 및 조합(combination)
  • rand: uniform
  • random_integers: uniform integer
  • randn: Gaussina normal

seed

  • 컴퓨터의 랜덤 생성은 사실 랜덤이 아니다.
  • 랜덤처럼 보이지만 정해진 알고리즘에 의해 생성되는 규칙적인 순열
  • 시작점이 정해지면 랜덤 함수를 사용해도 정해진 숫자가 나온다.

In [208]:
np.random.seed(0)
  • numpy.random.shuffle(x)
    • Parameters:
      • x : array_like
        • The array or list to be shuffled.

In [209]:
x = np.arange(10)
np.random.shuffle(x)
x


Out[209]:
array([2, 8, 4, 9, 1, 6, 7, 3, 0, 5])
  • numpy.random.choice(a, size=None, replace=True, p=None)
    • Parameters:
      • a : 1-D array-like or int
        • If an ndarray, a random sample is generated from its elements. If an int, the random sample is generated as if a was np.arange(n)
      • size : int or tuple of ints, optional
        • Output shape. If the given shape is, e.g., (m, n, k), then m n k samples are drawn. Default is None, in which case a single value is returned.
      • replace : boolean, optional
        • Whether the sample is with or without replacement
      • p : 1-D array-like, optional
        • The probabilities associated with each entry in a. If not given the sample assumes a uniform distribution over all entries in a.
    • Returns:
      • samples : 1-D ndarray, shape (size,)
        • The generated random samples

In [210]:
# same as shuffle
np.random.choice(5, 5, replace=False)


Out[210]:
array([1, 0, 2, 3, 4])

In [211]:
np.random.choice(5, 3, replace=False)


Out[211]:
array([3, 1, 2])

In [212]:
np.random.choice(5, 10)


Out[212]:
array([0, 1, 1, 0, 1, 4, 3, 0, 3, 0])

In [213]:
np.random.choice(5, 10, p=[0.1, 0, 0.3, 0.6, 0])


Out[213]:
array([3, 3, 3, 3, 2, 3, 2, 3, 3, 3])

In [214]:
x = np.random.rand(10000)
print(x[:10])
sns.distplot(x)


[ 0.26455561  0.77423369  0.45615033  0.56843395  0.0187898   0.6176355
  0.61209572  0.616934    0.94374808  0.6818203 ]
Out[214]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa534e20ad0>

In [215]:
np.random.rand(3,2)


Out[215]:
array([[ 0.87423081,  0.47428492],
       [ 0.66369491,  0.88164841],
       [ 0.30465899,  0.89576302]])

In [216]:
x = np.random.random_integers(-100, 100, 50)
sns.distplot(x, rug=True)


Out[216]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa534b701d0>

In [217]:
x = np.random.randn(1000)
sns.distplot(x, rug=True)


Out[217]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa5347d1210>

In [218]:
np.random.randn(3,4)


Out[218]:
array([[-0.24084301, -0.6676534 ,  0.06900028,  0.45158204],
       [-0.23342743, -0.33696328, -0.49507512, -0.49871364],
       [-0.18672772,  0.24499567, -0.77532038, -0.23060432]])

random number count

  • discrete values
    • unique()
    • bincount()
  • continuous values
    • histogram()

In [219]:
np.unique([11, 11, 2, 2, 34, 34])


Out[219]:
array([ 2, 11, 34])

In [220]:
a = np.array([[1, 1], [2, 3]])
np.unique(a)


Out[220]:
array([1, 2, 3])

In [221]:
a = np.array(['a', 'b', 'b', 'c', 'a'])
index, count = np.unique(a, return_counts=True)

In [222]:
count


Out[222]:
array([2, 2, 1])

In [223]:
index


Out[223]:
array(['a', 'b', 'c'], 
      dtype='|S1')

In [224]:
np.bincount([1, 1, 2, 2, 3, 3], minlength=6)


Out[224]:
array([0, 2, 2, 2, 0, 0])

In [225]:
np.histogram([1.1, 2.5, 1.8, 2.4, 0.7], bins=[0, 1, 2, 3])


Out[225]:
(array([1, 2, 2]), array([0, 1, 2, 3]))

In [226]:
np.histogram([1, 2, 1], bins=[0, 1, 2, 3])


Out[226]:
(array([0, 2, 1]), array([0, 1, 2, 3]))

In [227]:
np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])


Out[227]:
(array([1, 4, 1]), array([0, 1, 2, 3]))

In [228]:
np.histogram(np.arange(4), bins=np.arange(5), density=True)


Out[228]:
(array([ 0.25,  0.25,  0.25,  0.25]), array([0, 1, 2, 3, 4]))